{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Checking and describing the generated data\n",
    "\n",
    "It is always beneficial to add a notebook that quickly looks into the data to help you remember, which data you collected and if it actually looks correct."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from algbench import describe, read_as_pandas, Benchmark\n",
    "from _conf import EXPERIMENT_DATA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "An entry in the database can look like this:\n",
      "_____________________________________________\n",
      " result:\n",
      "| num_nodes: 264\n",
      "| lower_bound: 49134.999999999985\n",
      "| objective: 49134.99999999999\n",
      " timestamp: 2023-11-18T13:03:38.267844\n",
      " runtime: 7.737646818161011\n",
      " stdout: [[0.018755435943603516, 'Set parameter Username\\n'], [0.020169496536254883, '...\n",
      " stderr: []\n",
      " logging: [{'name': 'Evaluation', 'msg': 'Building model.', 'args': [], 'levelname': 'I...\n",
      " env_fingerprint: dc522e14e66ed51d57852d501a70f277a707ab31\n",
      " args_fingerprint: e0081c8b67d95fc78986cc58ecae061c7c08b64a\n",
      " parameters:\n",
      "| func: run_solver\n",
      "| args:\n",
      "|| instance_name: pr264\n",
      "|| time_limit: 90\n",
      "|| strategy: GurobiTspSolver\n",
      "|| opt_tol: 0.001\n",
      " argv: ['/ibr/home/krupke/anaconda3/envs/mo310/lib/python3.10/site-packages/slurmina...\n",
      " env:\n",
      "| hostname: algra02\n",
      "| python_version: 3.10.13 (main, Sep 11 2023, 13:44:35) [GCC 11.2.0]\n",
      "| python: /ibr/home/krupke/anaconda3/envs/mo310/bin/python3\n",
      "| cwd: /misc/ibr/home/krupke/cpsat-primer/examples/tsp_evaluation_tsplib\n",
      "| git_revision: 3a4433159072e6a3db4b4ff7c268d392e6e70732\n",
      "| python_file: /ibr/home/krupke/anaconda3/envs/mo310/lib/python3.10/site-packages/slurminade...\n",
      "______________________________________________\n",
      "Note that this is only based on the first entry, other entries could differ.\n"
     ]
    }
   ],
   "source": [
    "describe(EXPERIMENT_DATA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instance_name</th>\n",
       "      <th>num_nodes</th>\n",
       "      <th>time_limit</th>\n",
       "      <th>strategy</th>\n",
       "      <th>opt_tol</th>\n",
       "      <th>runtime</th>\n",
       "      <th>objective</th>\n",
       "      <th>lower_bound</th>\n",
       "      <th>opt_gap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>300</th>\n",
       "      <td>att48</td>\n",
       "      <td>48</td>\n",
       "      <td>90</td>\n",
       "      <td>GurobiTspSolver</td>\n",
       "      <td>0.001</td>\n",
       "      <td>0.094591</td>\n",
       "      <td>33522.0</td>\n",
       "      <td>33522.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>301</th>\n",
       "      <td>att48</td>\n",
       "      <td>48</td>\n",
       "      <td>90</td>\n",
       "      <td>GurobiTspSolver</td>\n",
       "      <td>0.010</td>\n",
       "      <td>0.064021</td>\n",
       "      <td>33522.0</td>\n",
       "      <td>33431.0</td>\n",
       "      <td>0.002722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302</th>\n",
       "      <td>att48</td>\n",
       "      <td>48</td>\n",
       "      <td>90</td>\n",
       "      <td>GurobiTspSolver</td>\n",
       "      <td>0.050</td>\n",
       "      <td>0.049895</td>\n",
       "      <td>33522.0</td>\n",
       "      <td>33431.0</td>\n",
       "      <td>0.002722</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>303</th>\n",
       "      <td>att48</td>\n",
       "      <td>48</td>\n",
       "      <td>90</td>\n",
       "      <td>GurobiTspSolver</td>\n",
       "      <td>0.100</td>\n",
       "      <td>0.034748</td>\n",
       "      <td>36559.0</td>\n",
       "      <td>33059.0</td>\n",
       "      <td>0.105871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>att48</td>\n",
       "      <td>48</td>\n",
       "      <td>90</td>\n",
       "      <td>GurobiTspSolver</td>\n",
       "      <td>0.250</td>\n",
       "      <td>0.022031</td>\n",
       "      <td>36559.0</td>\n",
       "      <td>31669.0</td>\n",
       "      <td>0.154410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>pr439</td>\n",
       "      <td>439</td>\n",
       "      <td>90</td>\n",
       "      <td>CpSatTspSolverMtz</td>\n",
       "      <td>0.001</td>\n",
       "      <td>185.639632</td>\n",
       "      <td>834126.0</td>\n",
       "      <td>93868.0</td>\n",
       "      <td>7.886159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>pr439</td>\n",
       "      <td>439</td>\n",
       "      <td>90</td>\n",
       "      <td>CpSatTspSolverMtz</td>\n",
       "      <td>0.010</td>\n",
       "      <td>183.571639</td>\n",
       "      <td>742170.0</td>\n",
       "      <td>93833.0</td>\n",
       "      <td>6.909477</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>157</th>\n",
       "      <td>pr439</td>\n",
       "      <td>439</td>\n",
       "      <td>90</td>\n",
       "      <td>CpSatTspSolverMtz</td>\n",
       "      <td>0.050</td>\n",
       "      <td>186.742688</td>\n",
       "      <td>866567.0</td>\n",
       "      <td>93833.0</td>\n",
       "      <td>8.235205</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>pr439</td>\n",
       "      <td>439</td>\n",
       "      <td>90</td>\n",
       "      <td>CpSatTspSolverMtz</td>\n",
       "      <td>0.100</td>\n",
       "      <td>183.098925</td>\n",
       "      <td>780813.0</td>\n",
       "      <td>93749.0</td>\n",
       "      <td>7.328761</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>pr439</td>\n",
       "      <td>439</td>\n",
       "      <td>90</td>\n",
       "      <td>CpSatTspSolverMtz</td>\n",
       "      <td>0.250</td>\n",
       "      <td>129.042056</td>\n",
       "      <td>809226.0</td>\n",
       "      <td>93783.0</td>\n",
       "      <td>7.628707</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>560 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    instance_name  num_nodes  time_limit           strategy  opt_tol  \\\n",
       "300         att48         48          90    GurobiTspSolver    0.001   \n",
       "301         att48         48          90    GurobiTspSolver    0.010   \n",
       "302         att48         48          90    GurobiTspSolver    0.050   \n",
       "303         att48         48          90    GurobiTspSolver    0.100   \n",
       "304         att48         48          90    GurobiTspSolver    0.250   \n",
       "..            ...        ...         ...                ...      ...   \n",
       "155         pr439        439          90  CpSatTspSolverMtz    0.001   \n",
       "156         pr439        439          90  CpSatTspSolverMtz    0.010   \n",
       "157         pr439        439          90  CpSatTspSolverMtz    0.050   \n",
       "158         pr439        439          90  CpSatTspSolverMtz    0.100   \n",
       "159         pr439        439          90  CpSatTspSolverMtz    0.250   \n",
       "\n",
       "        runtime  objective  lower_bound   opt_gap  \n",
       "300    0.094591    33522.0      33522.0  0.000000  \n",
       "301    0.064021    33522.0      33431.0  0.002722  \n",
       "302    0.049895    33522.0      33431.0  0.002722  \n",
       "303    0.034748    36559.0      33059.0  0.105871  \n",
       "304    0.022031    36559.0      31669.0  0.154410  \n",
       "..          ...        ...          ...       ...  \n",
       "155  185.639632   834126.0      93868.0  7.886159  \n",
       "156  183.571639   742170.0      93833.0  6.909477  \n",
       "157  186.742688   866567.0      93833.0  8.235205  \n",
       "158  183.098925   780813.0      93749.0  7.328761  \n",
       "159  129.042056   809226.0      93783.0  7.628707  \n",
       "\n",
       "[560 rows x 9 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = read_as_pandas(\n",
    "    EXPERIMENT_DATA,\n",
    "    lambda entry: {\n",
    "        \"instance_name\": entry[\"parameters\"][\"args\"][\"instance_name\"],\n",
    "        \"num_nodes\": entry[\"result\"][\"num_nodes\"],\n",
    "        \"time_limit\": entry[\"parameters\"][\"args\"][\"time_limit\"],\n",
    "        \"strategy\": entry[\"parameters\"][\"args\"][\"strategy\"],\n",
    "        \"opt_tol\": entry[\"parameters\"][\"args\"][\"opt_tol\"],\n",
    "        \"runtime\": entry[\"runtime\"],\n",
    "        \"objective\": entry[\"result\"][\"objective\"],\n",
    "        \"lower_bound\": entry[\"result\"][\"lower_bound\"],\n",
    "    },\n",
    ")\n",
    "t.drop_duplicates(\n",
    "    inplace=True, subset=[\"instance_name\", \"num_nodes\", \"strategy\", \"opt_tol\"]\n",
    ")\n",
    "t[\"opt_gap\"] = (t[\"objective\"] - t[\"lower_bound\"]) / t[\"lower_bound\"]\n",
    "t.sort_values([\"num_nodes\", \"instance_name\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "for entry in Benchmark(EXPERIMENT_DATA):\n",
    "    if (\n",
    "        entry[\"parameters\"][\"args\"][\"instance_name\"] == \"random_euclidean_100_1\"\n",
    "        and entry[\"parameters\"][\"args\"][\"strategy\"] == \"GurobiTspSolver\"\n",
    "    ):\n",
    "        print(\"=====================================\")\n",
    "        stdout = \"\".join(e[1] for e in entry[\"stdout\"])\n",
    "        stderr = \"\".join(e[1] for e in entry[\"stderr\"])\n",
    "        print(stdout)\n",
    "        print(stderr)\n",
    "        if not stdout.strip():\n",
    "            print(\"No stdout\")\n",
    "        print(\"=====================================\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check for errors in the data\n",
    "\n",
    "You always want to check if the results you got are actually feasible. Errors easily happen and are not always visible on the plots.\n",
    "Thus, you want to do some basic checks to detect errors early on. For example, you could accidentally have swapped lower and upper bounds in the data generation process.\n",
    "Depending on your plots, this may not be visible, and you may end up comparing the wrong data and draw the wrong conclusions.\n",
    "Or, you could have accidentally swapped runtime and objective values, which could look reasonable in the data as the runtime and the objective often increase with the instance size.\n",
    "\n",
    "A very basic check is to check if the best lower and upper bounds do not contradict each other. Many errors will be caught by this check. However, you often need some tolerance to account for numerical errors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert (t.dropna()[\"opt_gap\"] >= -0.0001).all(), \"Optimality gap is negative!\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>instance_name</th>\n",
       "      <th>num_nodes</th>\n",
       "      <th>time_limit</th>\n",
       "      <th>strategy</th>\n",
       "      <th>opt_tol</th>\n",
       "      <th>runtime</th>\n",
       "      <th>objective</th>\n",
       "      <th>lower_bound</th>\n",
       "      <th>opt_gap</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [instance_name, num_nodes, time_limit, strategy, opt_tol, runtime, objective, lower_bound, opt_gap]\n",
       "Index: []"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Always make sure that your results are not trivially wrong\n",
    "#  - e.g. lower bound is higher than objective\n",
    "max_lb = t.groupby([\"instance_name\"])[\"lower_bound\"].max()\n",
    "min_obj = t.groupby([\"instance_name\"])[\"objective\"].min()\n",
    "eps = 0.0001  # some tolerance is needed when working with floats.\n",
    "bad_instances = max_lb[max_lb - min_obj > eps * max_lb].index.to_list()\n",
    "from IPython.display import display\n",
    "\n",
    "display(t[t[\"instance_name\"].isin(bad_instances)])\n",
    "assert len(bad_instances) == 0, \"Bad instances detected: {}\".format(bad_instances)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mo310",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
